knitr::opts_knit$set(
    root.dir = '.',
    keep.tex = TRUE
);
knitr::opts_chunk$set(
    fig.width  = 9,
    fig.height = 9,
    fig.align  = 'center',
    fig.pos    = 'h',
    fig.show   = 'hold',
    echo       = TRUE,
    warning    = FALSE,
    message    = FALSE,
    cache      = FALSE
);

Exercises

PCA

1. Load the nutrimouse data from the mixOmics R package and investigate its structure.

library(mixOmics)

A data object provided by an R package can be loaded with data. Its structure can be obainted with str, length, dim, etc.

data("nutrimouse")
## display the structure of the nutrimouse object
str(nutrimouse)
## List of 4
##  $ gene    :'data.frame':    40 obs. of  120 variables:
##   ..$ X36b4    : num [1:40] -0.42 -0.44 -0.48 -0.45 -0.42 -0.43 -0.53 -0.49 -0.36 -0.5 ...
##   ..$ ACAT1    : num [1:40] -0.65 -0.68 -0.74 -0.69 -0.71 -0.69 -0.62 -0.69 -0.66 -0.62 ...
##   ..$ ACAT2    : num [1:40] -0.84 -0.91 -1.1 -0.65 -0.54 -0.8 -1 -0.91 -0.74 -0.79 ...
##   ..$ ACBP     : num [1:40] -0.34 -0.32 -0.46 -0.41 -0.38 -0.32 -0.44 -0.37 -0.39 -0.36 ...
##   ..$ ACC1     : num [1:40] -1.29 -1.23 -1.3 -1.26 -1.21 -1.13 -1.22 -1.29 -1.15 -1.21 ...
##   ..$ ACC2     : num [1:40] -1.13 -1.06 -1.09 -1.09 -0.89 -0.79 -1 -1.06 -1.08 -0.82 ...
##   ..$ ACOTH    : num [1:40] -0.93 -0.99 -1.06 -0.93 -1 -0.93 -0.94 -1.05 -0.88 -0.92 ...
##   ..$ ADISP    : num [1:40] -0.98 -0.97 -1.08 -1.02 -0.95 -0.97 -0.94 -1.02 -0.98 -0.99 ...
##   ..$ ADSS1    : num [1:40] -1.19 -1 -1.18 -1.07 -1.08 -1.07 -1.05 -1.16 -1.05 -1 ...
##   ..$ ALDH3    : num [1:40] -0.68 -0.62 -0.75 -0.71 -0.76 -0.75 -0.67 -0.75 -0.66 -0.69 ...
##   ..$ AM2R     : num [1:40] -0.59 -0.58 -0.66 -0.65 -0.59 -0.55 -0.66 -0.66 -0.53 -0.62 ...
##   ..$ AOX      : num [1:40] -0.16 -0.12 -0.16 -0.17 -0.31 -0.23 -0.09 -0.22 -0.06 -0.23 ...
##   ..$ BACT     : num [1:40] -0.22 -0.32 -0.32 -0.32 -0.31 -0.29 -0.25 -0.21 -0.15 -0.2 ...
##   ..$ BIEN     : num [1:40] -0.89 -0.88 -0.89 -0.77 -0.97 -0.84 -0.86 -0.9 -0.74 -0.76 ...
##   ..$ BSEP     : num [1:40] -0.69 -0.6 -0.7 -0.67 -0.68 -0.55 -0.67 -0.66 -0.6 -0.58 ...
##   ..$ Bcl.3    : num [1:40] -1.18 -1.07 -1.17 -1.12 -0.93 -1.08 -1.03 -1.01 -1.01 -1.1 ...
##   ..$ C16SR    : num [1:40] 1.66 1.65 1.57 1.61 1.66 1.7 1.58 1.62 1.72 1.55 ...
##   ..$ CACP     : num [1:40] -0.92 -0.87 -1.02 -0.89 -0.93 -0.97 -0.97 -0.96 -0.85 -0.95 ...
##   ..$ CAR1     : num [1:40] -0.97 -0.92 -0.98 -0.97 -1.06 -1.03 -0.91 -1.11 -0.85 -0.99 ...
##   ..$ CBS      : num [1:40] -0.26 -0.36 -0.4 -0.39 -0.35 -0.31 -0.32 -0.4 -0.26 -0.39 ...
##   ..$ CIDEA    : num [1:40] -1.21 -1.17 -1.29 -1.18 -1.15 -1.14 -1.16 -1.26 -1.12 -1.08 ...
##   ..$ COX1     : num [1:40] -1.11 -1.06 -1.17 -1.03 -0.99 -1.03 -1.15 -1.18 -0.94 -1.07 ...
##   ..$ COX2     : num [1:40] -1.18 -1.06 -1.14 -1.13 -1.1 -1.16 -1.06 -1.24 -1.23 -1.09 ...
##   ..$ CPT2     : num [1:40] -0.87 -0.87 -0.95 -0.88 -0.91 -0.92 -0.86 -0.93 -0.82 -0.88 ...
##   ..$ CYP24    : num [1:40] -1.37 -1.14 -1.3 -1.27 -1.2 -1.11 -1.12 -1.3 -1.14 -1.08 ...
##   ..$ CYP26    : num [1:40] -1.21 -1.12 -1.22 -1.18 -1.16 -1.1 -1.07 -1.23 -1.1 -1.1 ...
##   ..$ CYP27a1  : num [1:40] -0.71 -0.62 -0.78 -0.71 -0.69 -0.6 -0.69 -0.81 -0.62 -0.62 ...
##   ..$ CYP27b1  : num [1:40] -1.31 -1.14 -1.29 -1.27 -1.2 -1.15 -1.17 -1.28 -1.13 -1.15 ...
##   ..$ CYP2b10  : num [1:40] -1.23 -1.2 -1.32 -1.23 -1.22 -1.1 -1.07 -1.26 -1.19 -1.1 ...
##   ..$ CYP2b13  : num [1:40] -1.19 -1.06 -1.25 -1.13 -1.1 -1.07 -1.2 -1.37 -1.15 -1.11 ...
##   ..$ CYP2c29  : num [1:40] -0.06 -0.2 -0.3 -0.07 -0.29 -0.28 -0.1 -0.1 0.18 -0.33 ...
##   ..$ CYP3A11  : num [1:40] -0.09 -0.34 -0.45 -0.11 -0.51 -0.55 -0.18 -0.25 0.06 -0.4 ...
##   ..$ CYP4A10  : num [1:40] -0.81 -0.88 -0.71 -0.65 -1.16 -0.99 -0.62 -0.82 -0.48 -0.79 ...
##   ..$ CYP4A14  : num [1:40] -0.81 -0.84 -0.98 -0.41 -1.16 -1.09 -0.76 -0.87 -0.37 -0.95 ...
##   ..$ CYP7a    : num [1:40] -0.77 -0.71 -0.93 -0.8 -0.71 -0.74 -0.76 -0.88 -0.77 -0.77 ...
##   ..$ CYP8b1   : num [1:40] -0.77 -0.63 -0.53 -0.73 -0.51 -0.55 -0.57 -0.63 -0.6 -0.66 ...
##   ..$ FAS      : num [1:40] -0.41 -0.37 -0.3 -0.59 -0.06 0.18 -0.16 0.04 -0.53 0.08 ...
##   ..$ FAT      : num [1:40] -1.03 -0.98 -1.03 -1.06 -0.99 -0.99 -0.89 -1.08 -1.04 -0.91 ...
##   ..$ FDFT     : num [1:40] -0.98 -0.92 -1.04 -1 -0.99 -1 -1.02 -0.97 -1.03 -0.95 ...
##   ..$ FXR      : num [1:40] -0.93 -0.87 -1 -0.9 -0.89 -0.89 -0.86 -1.01 -0.81 -0.91 ...
##   ..$ G6PDH    : num [1:40] -1.22 -1.09 -1.28 -1.19 -1.16 -0.96 -1.15 -1.26 -1.13 -1.03 ...
##   ..$ G6Pase   : num [1:40] -0.46 -0.63 -1.06 -0.71 -0.58 -0.49 -0.51 -0.61 -0.38 -0.6 ...
##   ..$ GK       : num [1:40] -0.71 -0.67 -0.68 -0.75 -0.62 -0.59 -0.59 -0.66 -0.68 -0.47 ...
##   ..$ GS       : num [1:40] -1.24 -1.22 -1.36 -1.21 -1.22 -1.16 -1.15 -1.31 -1.16 -1.19 ...
##   ..$ GSTa     : num [1:40] 0 -0.05 -0.13 -0.09 -0.02 -0.11 -0.06 -0.04 0.03 -0.02 ...
##   ..$ GSTmu    : num [1:40] 0.02 -0.05 -0.19 0.03 -0.23 -0.05 -0.22 -0.07 0.23 -0.14 ...
##   ..$ GSTpi2   : num [1:40] 0.45 0.3 0.18 0.36 0.3 0.17 0.12 0.48 0.53 0.01 ...
##   ..$ HMGCoAred: num [1:40] -0.95 -0.86 -0.96 -1.02 -0.7 -0.76 -1 -0.88 -0.96 -0.7 ...
##   ..$ HPNCL    : num [1:40] -0.65 -0.69 -0.75 -0.61 -0.66 -0.56 -0.61 -0.71 -0.53 -0.6 ...
##   ..$ IL.2     : num [1:40] -0.94 -0.94 -1.16 -0.97 -0.93 -0.96 -0.96 -0.85 -0.84 -0.95 ...
##   ..$ L.FABP   : num [1:40] 0.24 0.27 0.17 0.16 0 0.23 0.18 0.18 0.2 0.2 ...
##   ..$ LCE      : num [1:40] 0.09 0.06 -0.05 0.01 -0.07 -0.1 -0.03 -0.08 0.12 -0.1 ...
##   ..$ LDLr     : num [1:40] -0.82 -0.68 -0.82 -0.94 -0.73 -0.74 -0.8 -0.83 -0.81 -0.72 ...
##   ..$ LPK      : num [1:40] -0.32 -0.39 -0.38 -0.38 -0.17 -0.14 -0.35 -0.13 -0.32 -0.24 ...
##   ..$ LPL      : num [1:40] -1.01 -0.97 -1.11 -0.99 -1.05 -0.99 -0.93 -1.07 -0.94 -0.95 ...
##   ..$ LXRa     : num [1:40] -0.82 -0.82 -0.91 -0.85 -0.83 -0.79 -0.77 -0.84 -0.75 -0.78 ...
##   ..$ LXRb     : num [1:40] -1 -0.95 -1.16 -1.01 -1.01 -0.99 -0.98 -1.04 -0.98 -0.99 ...
##   ..$ Lpin     : num [1:40] -0.87 -0.97 -0.95 -1 -0.57 -0.51 -0.81 -0.83 -0.83 -0.48 ...
##   ..$ Lpin1    : num [1:40] -0.85 -0.99 -0.94 -1.02 -0.53 -0.51 -0.81 -0.87 -0.82 -0.49 ...
##   ..$ Lpin2    : num [1:40] -0.85 -0.87 -0.9 -0.88 -0.72 -0.68 -0.8 -0.9 -0.68 -0.67 ...
##   ..$ Lpin3    : num [1:40] -1.23 -1.12 -1.25 -1.18 -1.12 -1.09 -1.04 -1.23 -1.13 -1.11 ...
##   ..$ M.CPT1   : num [1:40] -1.15 -1.06 -1.26 -1.1 -1.11 -1.14 -1.08 -1.19 -1.06 -1.09 ...
##   ..$ MCAD     : num [1:40] -0.6 -0.62 -0.7 -0.59 -0.69 -0.66 -0.53 -0.66 -0.45 -0.62 ...
##   ..$ MDR1     : num [1:40] -1.15 -1.1 -1.26 -1.13 -1.11 -1.09 -1.09 -1.19 -1.06 -1.1 ...
##   ..$ MDR2     : num [1:40] -0.77 -0.65 -0.86 -0.77 -0.7 -0.69 -0.81 -0.81 -0.69 -0.75 ...
##   ..$ MRP6     : num [1:40] -0.99 -0.85 -0.9 -0.95 -0.91 -0.84 -0.88 -1.02 -0.83 -0.86 ...
##   ..$ MS       : num [1:40] -1.11 -1.06 -1.2 -1.09 -1.09 -1.09 -0.99 -1.16 -1.06 -0.98 ...
##   ..$ MTHFR    : num [1:40] -0.96 -0.99 -1.1 -0.95 -0.93 -0.96 -0.88 -1.03 -1.01 -0.95 ...
##   ..$ NGFiB    : num [1:40] -1.21 -1.08 -1.24 -1.12 -1.11 -1.04 -1.02 -1.21 -1.11 -1.04 ...
##   ..$ NURR1    : num [1:40] -1.21 -1.1 -1.32 -1.11 -1.14 -1.18 -1.1 -1.26 -1.14 -1.09 ...
##   ..$ Ntcp     : num [1:40] -0.49 -0.45 -0.44 -0.54 -0.47 -0.46 -0.55 -0.5 -0.44 -0.43 ...
##   ..$ OCTN2    : num [1:40] -1.15 -1.15 -1.2 -1.17 -1.19 -1.11 -1.08 -1.21 -1.05 -1.08 ...
##   ..$ PAL      : num [1:40] -1.32 -1.25 -1.16 -1.25 -1.24 -1.02 -1.04 -1.27 -0.93 -0.92 ...
##   ..$ PDK4     : num [1:40] -1.16 -1.16 -1.27 -1.16 -1.13 -1.08 -1.14 -1.24 -1.19 -1.04 ...
##   ..$ PECI     : num [1:40] -0.68 -0.69 -0.92 -0.71 -0.83 -0.81 -0.79 -0.85 -0.58 -0.82 ...
##   ..$ PLTP     : num [1:40] -1.1 -0.99 -1.03 -1.08 -0.98 -0.89 -1.05 -1.07 -1.02 -0.85 ...
##   ..$ PMDCI    : num [1:40] -0.52 -0.52 -0.6 -0.52 -0.71 -0.69 -0.55 -0.57 -0.46 -0.69 ...
##   ..$ PON      : num [1:40] -0.52 -0.55 -0.65 -0.64 -0.57 -0.63 -0.56 -0.65 -0.6 -0.64 ...
##   ..$ PPARa    : num [1:40] -0.93 -0.86 -0.95 -0.97 -0.94 -0.95 -0.9 -1.12 -0.88 -0.95 ...
##   ..$ PPARd    : num [1:40] -1.51 -1.59 -1.71 -1.57 -1.53 -1.56 -1.49 -1.57 -1.58 -1.54 ...
##   ..$ PPARg    : num [1:40] -1.06 -1.02 -1.14 -1.05 -1.09 -1.01 -1 -1.13 -0.97 -1.07 ...
##   ..$ PXR      : num [1:40] -0.99 -0.96 -1.1 -0.99 -1 -1.03 -0.93 -1.07 -0.98 -0.96 ...
##   ..$ Pex11a   : num [1:40] -1 -1.02 -1.2 -1 -0.95 -1.07 -1.05 -1.02 -1 -1.01 ...
##   ..$ RARa     : num [1:40] -1.2 -1.06 -1.16 -1.17 -1.15 -1.13 -1.09 -1.24 -1.03 -1.09 ...
##   ..$ RARb2    : num [1:40] -1.19 -1.11 -1.23 -1.16 -1.14 -1.07 -1.09 -1.18 -1.12 -1.1 ...
##   ..$ RXRa     : num [1:40] -0.67 -0.59 -0.68 -0.72 -0.78 -0.62 -0.65 -0.76 -0.55 -0.67 ...
##   ..$ RXRb2    : num [1:40] -0.95 -0.95 -1.07 -0.95 -0.98 -0.94 -0.92 -1.03 -0.94 -0.95 ...
##   ..$ RXRg1    : num [1:40] -1.16 -1.1 -1.21 -1.1 -1.11 -1.03 -1.07 -1.19 -1.05 -1.04 ...
##   ..$ S14      : num [1:40] -0.93 -0.86 -0.84 -1.05 -0.65 -0.4 -0.73 -0.62 -0.99 -0.25 ...
##   ..$ SHP1     : num [1:40] -1.1 -0.97 -1.09 -1.03 -1.13 -0.98 -0.95 -1.21 -0.93 -0.97 ...
##   ..$ SIAT4c   : num [1:40] -1.07 -0.97 -1.04 -0.99 -0.94 -0.93 -0.89 -1.04 -0.93 -0.95 ...
##   ..$ SPI1.1   : num [1:40] 1.19 1.15 1.09 1.07 1.22 1.05 1.15 1.18 1.21 1.04 ...
##   ..$ SR.BI    : num [1:40] -0.84 -0.86 -0.95 -0.95 -1.06 -0.8 -0.83 -1 -0.83 -0.77 ...
##   ..$ THB      : num [1:40] -0.79 -0.85 -0.92 -0.79 -0.84 -0.86 -0.8 -0.86 -0.83 -0.85 ...
##   ..$ THIOL    : num [1:40] -0.18 -0.15 -0.24 -0.15 -0.35 -0.29 -0.22 -0.23 -0.17 -0.18 ...
##   ..$ TRa      : num [1:40] -1.48 -1.46 -1.58 -1.54 -1.46 -1.44 -1.32 -1.56 -1.46 -1.35 ...
##   ..$ TRb      : num [1:40] -1.07 -1 -1.16 -1.11 -1.01 -1 -0.97 -1.08 -1.02 -0.98 ...
##   ..$ Tpalpha  : num [1:40] -0.69 -0.74 -0.81 -0.74 -0.82 -0.76 -0.72 -0.76 -0.65 -0.83 ...
##   ..$ Tpbeta   : num [1:40] -1.11 -1.09 -1.14 -1.04 -1.2 -1.05 -1 -1.16 -0.91 -1.07 ...
##   .. [list output truncated]
##  $ lipid   :'data.frame':    40 obs. of  21 variables:
##   ..$ C14.0   : num [1:40] 0.34 0.38 0.36 0.22 0.37 1.7 0.35 0.34 0.22 1.38 ...
##   ..$ C16.0   : num [1:40] 26.4 24 23.7 25.5 24.8 ...
##   ..$ C18.0   : num [1:40] 10.22 9.93 8.96 8.14 9.63 ...
##   ..$ C16.1n.9: num [1:40] 0.35 0.55 0.55 0.49 0.46 0.66 0.36 0.29 0.44 0.9 ...
##   ..$ C16.1n.7: num [1:40] 3.1 2.54 2.65 2.82 2.85 7.26 3.6 3.27 2.36 7.01 ...
##   ..$ C18.1n.9: num [1:40] 17 20.1 22.9 21.9 21.4 ...
##   ..$ C18.1n.7: num [1:40] 2.41 3.92 3.96 2.52 2.96 8.99 2.15 1.99 1.81 8.85 ...
##   ..$ C20.1n.9: num [1:40] 0.26 0.23 0.26 0 0.3 0.36 0.25 0.31 0 0.21 ...
##   ..$ C20.3n.9: num [1:40] 0 0 0.19 0 0.27 2.89 0 0 0 2.03 ...
##   ..$ C18.2n.6: num [1:40] 8.93 14.98 16.06 13.89 14.55 ...
##   ..$ C18.3n.6: num [1:40] 0 0.3 0.27 0 0.27 2.66 0 0 0 0 ...
##   ..$ C20.2n.6: num [1:40] 0 0.3 0.33 0 0.23 0 0 0 0 0 ...
##   ..$ C20.3n.6: num [1:40] 0.78 1.64 1.51 1.1 1.58 0.81 0.68 0.72 1.07 0.59 ...
##   ..$ C20.4n.6: num [1:40] 3.07 15.34 13.27 3.92 11.85 ...
##   ..$ C22.4n.6: num [1:40] 0 0.58 0.54 0 0.32 0 0 0 0 0 ...
##   ..$ C22.5n.6: num [1:40] 0 2.1 1.77 0 0.44 0.56 0 0 0 0.39 ...
##   ..$ C18.3n.3: num [1:40] 5.97 0 0 0.49 0.42 0 8.4 6.01 0.55 0 ...
##   ..$ C20.3n.3: num [1:40] 0.37 0 0 0 0 0 0.42 0.39 0 0 ...
##   ..$ C20.5n.3: num [1:40] 8.62 0 0 2.99 0.3 0 7.37 7.96 3.13 0 ...
##   ..$ C22.5n.3: num [1:40] 1.75 0.48 0.22 1.04 0.35 2.13 2.05 2.33 1.65 0 ...
##   ..$ C22.6n.3: num [1:40] 10.39 2.61 2.51 14.99 6.69 ...
##  $ diet    : Factor w/ 5 levels "coc","fish","lin",..: 3 5 5 2 4 1 3 3 2 1 ...
##  $ genotype: Factor w/ 2 levels "wt","ppar": 1 1 1 1 1 1 1 1 1 1 ...
## check dimensions
lapply(nutrimouse, dim) # apply function dim to each element in list nutrimouse
## $gene
## [1]  40 120
## 
## $lipid
## [1] 40 21
## 
## $diet
## NULL
## 
## $genotype
## NULL
lapply(nutrimouse, length) # apply function length to each element in list nutrimouse
## $gene
## [1] 120
## 
## $lipid
## [1] 21
## 
## $diet
## [1] 40
## 
## $genotype
## [1] 40

2. Take the gene expression dataset in samples x variables matrix format. Investigate variables’ distribution.

## get gene expression data structure
class(nutrimouse$gene)
## [1] "data.frame"
dim(nutrimouse$gene)
## [1]  40 120
rownames(nutrimouse$gene)[1:10]
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
colnames(nutrimouse$gene)[1:10]
##  [1] "X36b4" "ACAT1" "ACAT2" "ACBP"  "ACC1"  "ACC2"  "ACOTH" "ADISP" "ADSS1"
## [10] "ALDH3"
## check if there are missing values
any(is.na(nutrimouse$gene))
## [1] FALSE
## investigate each variable
summary(nutrimouse$gene[, 1:4])
##      X36b4             ACAT1             ACAT2              ACBP        
##  Min.   :-0.5800   Min.   :-0.7500   Min.   :-1.1000   Min.   :-0.6600  
##  1st Qu.:-0.5025   1st Qu.:-0.6900   1st Qu.:-0.8800   1st Qu.:-0.5025  
##  Median :-0.4600   Median :-0.6600   Median :-0.7950   Median :-0.4250  
##  Mean   :-0.4552   Mean   :-0.6552   Mean   :-0.7668   Mean   :-0.4338  
##  3rd Qu.:-0.4200   3rd Qu.:-0.6200   3rd Qu.:-0.6450   3rd Qu.:-0.3550  
##  Max.   :-0.3000   Max.   :-0.5200   Max.   :-0.3900   Max.   :-0.2400
colors <- rainbow(20, alpha=1)
plot(density(scale(nutrimouse$gene[, 1], center=T, scale=F)), 
     col=colors[1], xlim=c(-0.5,0.5), ylim=c(0,8),
     main='Density plot of the first 20 genes')
invisible(sapply(2:20, function(i) {
    lines(density(scale(nutrimouse$gene[, i], center=T, scale=F)), col=colors[i])
}))

3. Perform PCA and investigate variances, sample distribution and variable relationship with plots.

A number of methods in different R packages can perform PCA, e.g. stats::prcomp, stats::princomp, mixOmics::pca, multiblock::pca, psych::principal, FactoMineR::PCA, etc.

pca.res <- prcomp(nutrimouse$gene, center=TRUE, scale.=F)
names(pca.res)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
summary(pca.res)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     0.6763 0.5064 0.4033 0.28206 0.24164 0.19445 0.17513
## Proportion of Variance 0.3497 0.1961 0.1244 0.06084 0.04465 0.02891 0.02345
## Cumulative Proportion  0.3497 0.5458 0.6702 0.73107 0.77572 0.80463 0.82808
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.16498 0.14796 0.13623 0.13425 0.11505 0.11208 0.11052
## Proportion of Variance 0.02081 0.01674 0.01419 0.01378 0.01012 0.00961 0.00934
## Cumulative Proportion  0.84889 0.86563 0.87983 0.89361 0.90373 0.91333 0.92267
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.10450 0.09952 0.09052 0.08962 0.07914 0.07511 0.07313
## Proportion of Variance 0.00835 0.00757 0.00627 0.00614 0.00479 0.00431 0.00409
## Cumulative Proportion  0.93102 0.93860 0.94486 0.95101 0.95579 0.96011 0.96420
##                           PC22    PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.06913 0.06708 0.06308 0.06186 0.06029 0.05810 0.05639
## Proportion of Variance 0.00365 0.00344 0.00304 0.00293 0.00278 0.00258 0.00243
## Cumulative Proportion  0.96785 0.97129 0.97434 0.97726 0.98004 0.98262 0.98505
##                           PC29    PC30    PC31    PC32    PC33    PC34    PC35
## Standard deviation     0.05151 0.04984 0.04840 0.04724 0.04602 0.04083 0.03979
## Proportion of Variance 0.00203 0.00190 0.00179 0.00171 0.00162 0.00127 0.00121
## Cumulative Proportion  0.98708 0.98898 0.99077 0.99248 0.99410 0.99538 0.99659
##                           PC36    PC37    PC38    PC39      PC40
## Standard deviation     0.03680 0.03468 0.03282 0.02883 1.865e-15
## Proportion of Variance 0.00104 0.00092 0.00082 0.00064 0.000e+00
## Cumulative Proportion  0.99762 0.99854 0.99936 1.00000 1.000e+00

Variances = eigenvalues of the covariance matrix = (standard deviation)^2.

variances <- pca.res$sdev^2
variances
##  [1] 4.573742e-01 2.564410e-01 1.626804e-01 7.955690e-02 5.838751e-02
##  [6] 3.780991e-02 3.066913e-02 2.721979e-02 2.189256e-02 1.855921e-02
## [11] 1.802243e-02 1.323667e-02 1.256153e-02 1.221509e-02 1.091924e-02
## [16] 9.905054e-03 8.193579e-03 8.032182e-03 6.262595e-03 5.641794e-03
## [21] 5.348430e-03 4.779376e-03 4.500169e-03 3.978795e-03 3.826089e-03
## [26] 3.634453e-03 3.376009e-03 3.179730e-03 2.653212e-03 2.484088e-03
## [31] 2.342963e-03 2.231208e-03 2.117845e-03 1.667351e-03 1.583188e-03
## [36] 1.353925e-03 1.202714e-03 1.076873e-03 8.311648e-04 3.480070e-30

Scree plot: plot of variances.

screeplot(pca.res, npcs=length(variances), type='lines')
screeplot(pca.res, npcs=length(variances), type='barplot')
barplot(variances, xlab='PC', ylab='Variance', names.arg=1:length(variances))

Scree plot on variance percentage.

varPercent <- variances/sum(variances) * 100
barplot(varPercent, xlab='PC', ylab='Percent Variance', names.arg=1:length(varPercent))

Scores: sample coordinates in the new reference (rotated axes or principal components).

scores <- pca.res$x
str(scores)
##  num [1:40, 1:40] -0.5036 -0.6119 0.0596 -0.4686 -0.2457 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:40] "1" "2" "3" "4" ...
##   ..$ : chr [1:40] "PC1" "PC2" "PC3" "PC4" ...

Score plot: plot of sample distribution.

PCx <- "PC1"
PCy <- "PC2"
plot(scores[, PCx], scores[, PCy], xlab=PCx, ylab=PCy, pch=16)
text(scores[, PCx], scores[, PCy]-0.05, rownames(scores), col='blue', cex=0.7)

Loadings: contributions of variables to principal components (eigenvectors of covariance matrix).

loadings <- pca.res$rotation
str(loadings)
##  num [1:120, 1:40] -0.0425 -0.023 -0.0131 -0.107 -0.0618 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:120] "X36b4" "ACAT1" "ACAT2" "ACBP" ...
##   ..$ : chr [1:40] "PC1" "PC2" "PC3" "PC4" ...

Loading plot: plot of variables’ contribution, revealing their relationship.

plot(loadings[, PCx], loadings[, PCy], type='n', main="Loadings")
arrows(0, 0, loadings[, PCx], loadings[, PCy], xlab=PCx, ylab=PCy,
       length=0.1, angle=20, col=rgb(0,0,1,alpha=apply(loadings[, c(PCx, PCy)], 1, norm, "2")))
text(loadings[, PCx], loadings[, PCy], rownames(loadings), col='grey', cex=0.7)

Both score and loading plots can be plotted altogether with the biplot function.

## biplot
biplot(pca.res, expand=1, cex=c(0.5, 0.7), col=c("gray50", "red"))

library(factoextra)
fviz_pca_biplot(pca.res, repel = TRUE,
                col.var = "blue", # Variables color
                habillage = nutrimouse$genotype,
                addEllipses = T,
                legend="none"
                )

4. Visually investigate the sample distribution with coloring by metadata or expression of certain genes.

The samples can be colored with some metadata, e.g genotype or diet,

plot(scores[, PCx], scores[, PCy], main="Scores",
     col=c(1:nlevels(nutrimouse$diet))[nutrimouse$diet],
     pch=c(17,19)[nutrimouse$genotype],
     xlab=paste0(PCx,
                 " (",
                 round((summary(pca.res)$importance)[2, PCx], 2), ")"),
     ylab=paste0(PCy,
                 " (",
                 round((summary(pca.res)$importance)[2, PCy], 2), ")")
)
legend("topright", title="genotype",
       legend=levels(nutrimouse$genotype),
       pch=c(17,19), cex=0.7)
legend("bottomright", title="diet",
       legend=levels(nutrimouse$diet),
       col=c(1:5), cex=0.7, pch=16)

or by some gene expression.

nbreaks <- 5
plot(scores[, PCx], scores[, PCy], xlab=PCx, ylab=PCy, 
     pch=c(17,19)[nutrimouse$genotype], 
     col=colorRampPalette(c('red','blue'))(nbreaks)[as.numeric(cut(nutrimouse$gene$ALDH3,breaks = nbreaks))])

PLS

1. Perform PLS (mixOmics::pls) between gene and lipid. Investigate its output, sample distribution and variable relationship with plots.

pls.res <- pls(X=nutrimouse$gene, Y=nutrimouse$lipid, ncomp=2, scale=TRUE, mode="canonical")
max(abs(scale(nutrimouse$gene, center=T, scale=T) - pls.res$X))
## [1] 1.332268e-15
max(abs(scale(nutrimouse$lipid, center=T, scale=T) - pls.res$Y))
## [1] 8.881784e-16

The sample distribution plot can be performed with variates, sample coordinates in the new reference (rotated axes) for each of the two blocks,

str(pls.res$variates)
## List of 2
##  $ X: num [1:40, 1:2] 6.659 0.614 9.876 4.864 0.934 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:2] "comp1" "comp2"
##  $ Y: num [1:40, 1:2] 2.33 2.6 1.98 1.94 2.29 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:2] "comp1" "comp2"
PCx <- "comp1"
PCy <- "comp2"
par(mfrow=c(1,2))
plot(pls.res$variates$X[, PCx], pls.res$variates$X[, PCy], xlab=PCx, ylab=PCy, main="X", type='n')
text(pls.res$variates$X[, PCx], pls.res$variates$X[, PCy], rownames(pls.res$variates$X), col='blue', cex=0.6)
plot(pls.res$variates$Y[, PCx], pls.res$variates$Y[, PCy], xlab=PCx, ylab=PCy, main="Y", type='n')
text(pls.res$variates$Y[, PCx], pls.res$variates$Y[, PCy], rownames(pls.res$variates$Y), col='blue', cex=0.6)

which is also produced with plotIndiv.

plotIndiv(pls.res)

Loading plot: plot of variables’ contribution in each data block to each variate, after deflating more important variates,

par(mfrow=c(1,2), las=2, mar=c(4,8,1,1))
loadings.ind.X <- order(abs(pls.res$loadings$X[, "comp1"]), decreasing = T)
barplot(head(pls.res$loadings$X[loadings.ind.X, "comp1"], 10), main="X", horiz = T, cex.names=0.8)
loadings.ind.Y <- order(abs(pls.res$loadings$Y[, "comp1"]), decreasing = T)
barplot(head(pls.res$loadings$Y[loadings.ind.Y, "comp1"], 10), main="Y", horiz = T, cex.names=0.8)

which is the same as with plotLoadings.

plotLoadings(pls.res, ndisplay = 10)

The plot of variable relationship could be obtained from loadings.star.

names(pls.res$loadings.star) <- c("X", "Y")
colnames(pls.res$loadings.star$X) <- colnames(pls.res$loadings.star$Y) <- c(PCx, PCy)
plot(1,1,type='n',
     xlim=range(c(pls.res$loadings.star$X[, PCx],pls.res$loadings.star$Y[, PCx])), 
     ylim=range(c(pls.res$loadings.star$X[, PCy],pls.res$loadings.star$Y[, PCy])))
arrows(0, 0, pls.res$loadings.star$X[, PCx], pls.res$loadings.star$X[, PCy],
       length=0.1, angle=20, col=rgb(0,0,1,alpha=apply(pls.res$loadings.star$X[, c(PCx, PCy)], 1, norm, "2")))
text(pls.res$loadings.star$X[, PCx], 
     pls.res$loadings.star$X[, PCy], 
     rownames(pls.res$loadings.star$X), col='grey', cex=0.7)
arrows(0, 0, pls.res$loadings.star$Y[, PCx], pls.res$loadings.star$Y[, PCy],
       length=0.1, angle=20, col=rgb(1,0,0,alpha=apply(pls.res$loadings.star$Y[, c(PCx, PCy)], 1, norm, "2")))
text(pls.res$loadings.star$Y[, PCx], 
     pls.res$loadings.star$Y[, PCy], 
     rownames(pls.res$loadings.star$Y), col='grey', cex=0.7)
plotVar(pls.res)

Both sample distribution and variable relationship plot could be done with biplot function.

biplot(pls.res, block="X", ind.names.size=3, var.names.size=2)
biplot(pls.res, block="Y", ind.names.size=3, var.names.size=2)

2. Observe the difference between the two modes regression and canonical of PLS.

pls.reg.res <- pls(X=nutrimouse$gene, Y=nutrimouse$lipid, ncomp=2, scale=TRUE, mode="regression")
pls.can.res <- pls(X=nutrimouse$gene, Y=nutrimouse$lipid, ncomp=2, scale=TRUE, mode="canonical")
par(mfrow=c(2,2))
biplot(pls.reg.res, block="X", ind.names.size=3, var.names.size=2)
biplot(pls.can.res, block="X", ind.names.size=3, var.names.size=2)
biplot(pls.reg.res, block="Y", ind.names.size=3, var.names.size=2)
biplot(pls.can.res, block="Y", ind.names.size=3, var.names.size=2)

3. Perform PLS-DA (mixOmics::plsda) between gene and genotype. Redo PLS-DA using mixOmics::pls and compare the results.

pls.da.res <- plsda(X=nutrimouse$gene, Y=nutrimouse$genotype, ncomp=2, scale=TRUE)
pls.regda.res <- pls(X=nutrimouse$gene, Y=c(0,1)[nutrimouse$genotype], ncomp=2, scale=TRUE, mode="regression")
par(mfrow=c(1,2))
biplot(pls.da.res, block="X", ind.names.size=3, var.names.size=2,
       group=nutrimouse$genotype, col.per.group = c("red", "blue"), legend.title = 'genotype')
biplot(pls.regda.res, block="X", ind.names.size=3, var.names.size=2,
       group=nutrimouse$genotype, col.per.group = c("red", "blue"), legend.title = 'genotype')

4. Perform OPLS-DA (ropls::opls) between gene and genotype. Investigate its output, sample distribution, variable relationship and predictive performance.

(https://bioconductor.org/packages/release/bioc/vignettes/ropls/inst/doc/ropls-vignette.html)

library(ropls)
opls.res <- opls(x=nutrimouse$gene, y=nutrimouse$genotype, predI=1, orthoI=NA, fig.pdfC='none')
## OPLS-DA
## 40 samples x 120 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.615    0.975   0.935 0.083   1   2 0.05 0.05
par(mfrow=c(2,2))
plot(opls.res, typeVc='x-score')
plot(opls.res, typeVc='x-loading')
plot(opls.res, typeVc='overview')
plot(opls.res, typeVc='correlation')

opls.res <- opls(x=nutrimouse$gene, y=nutrimouse$genotype, predI=1, orthoI=NA, permI=100, fig.pdfC='none')
## OPLS-DA
## 40 samples x 120 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total    0.615    0.975   0.935 0.083   1   2 0.01 0.01
plot(opls.res, typeVc='permutation')

opls.res <- opls(x=nutrimouse$gene, y=nutrimouse$genotype, predI=1, orthoI=NA, subset=c(1:13, 21:33), fig.pdfC='none')
## OPLS-DA
## 26 samples x 120 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum)  RMSEE RMSEP pre ort
## Total    0.646    0.983   0.915 0.0704 0.134   1   2
par(mfrow=c(1,2))
plot(opls.res, typeVc='predict-train')
plot(opls.res, typeVc='predict-test')

# confusion matrix on training set
train_id <- getSubsetVi(opls.res)
table(nutrimouse$genotype[train_id], fitted(opls.res))
##       
##        wt ppar
##   wt   13    0
##   ppar  0   13
# confusion matrix on test set
table(nutrimouse$genotype[-train_id], predict(opls.res, nutrimouse$gene[-train_id,]))
##       
##        wt ppar
##   wt    7    0
##   ppar  0    7

More on CCA

1. Perform CCA (mixOmics::rcc) between 20 genes and all lipids. Investigate correlations, sample distribution and variable relationship with plots.

The gene expression data is reduced to 20 genes so that the number of variables is less than the number of samples, to perform an unregularized CCA.

nutrimouse$gene_selected <- nutrimouse$gene[, 1:20]
str(nutrimouse$gene_selected)
## 'data.frame':    40 obs. of  20 variables:
##  $ X36b4: num  -0.42 -0.44 -0.48 -0.45 -0.42 -0.43 -0.53 -0.49 -0.36 -0.5 ...
##  $ ACAT1: num  -0.65 -0.68 -0.74 -0.69 -0.71 -0.69 -0.62 -0.69 -0.66 -0.62 ...
##  $ ACAT2: num  -0.84 -0.91 -1.1 -0.65 -0.54 -0.8 -1 -0.91 -0.74 -0.79 ...
##  $ ACBP : num  -0.34 -0.32 -0.46 -0.41 -0.38 -0.32 -0.44 -0.37 -0.39 -0.36 ...
##  $ ACC1 : num  -1.29 -1.23 -1.3 -1.26 -1.21 -1.13 -1.22 -1.29 -1.15 -1.21 ...
##  $ ACC2 : num  -1.13 -1.06 -1.09 -1.09 -0.89 -0.79 -1 -1.06 -1.08 -0.82 ...
##  $ ACOTH: num  -0.93 -0.99 -1.06 -0.93 -1 -0.93 -0.94 -1.05 -0.88 -0.92 ...
##  $ ADISP: num  -0.98 -0.97 -1.08 -1.02 -0.95 -0.97 -0.94 -1.02 -0.98 -0.99 ...
##  $ ADSS1: num  -1.19 -1 -1.18 -1.07 -1.08 -1.07 -1.05 -1.16 -1.05 -1 ...
##  $ ALDH3: num  -0.68 -0.62 -0.75 -0.71 -0.76 -0.75 -0.67 -0.75 -0.66 -0.69 ...
##  $ AM2R : num  -0.59 -0.58 -0.66 -0.65 -0.59 -0.55 -0.66 -0.66 -0.53 -0.62 ...
##  $ AOX  : num  -0.16 -0.12 -0.16 -0.17 -0.31 -0.23 -0.09 -0.22 -0.06 -0.23 ...
##  $ BACT : num  -0.22 -0.32 -0.32 -0.32 -0.31 -0.29 -0.25 -0.21 -0.15 -0.2 ...
##  $ BIEN : num  -0.89 -0.88 -0.89 -0.77 -0.97 -0.84 -0.86 -0.9 -0.74 -0.76 ...
##  $ BSEP : num  -0.69 -0.6 -0.7 -0.67 -0.68 -0.55 -0.67 -0.66 -0.6 -0.58 ...
##  $ Bcl.3: num  -1.18 -1.07 -1.17 -1.12 -0.93 -1.08 -1.03 -1.01 -1.01 -1.1 ...
##  $ C16SR: num  1.66 1.65 1.57 1.61 1.66 1.7 1.58 1.62 1.72 1.55 ...
##  $ CACP : num  -0.92 -0.87 -1.02 -0.89 -0.93 -0.97 -0.97 -0.96 -0.85 -0.95 ...
##  $ CAR1 : num  -0.97 -0.92 -0.98 -0.97 -1.06 -1.03 -0.91 -1.11 -0.85 -0.99 ...
##  $ CBS  : num  -0.26 -0.36 -0.4 -0.39 -0.35 -0.31 -0.32 -0.4 -0.26 -0.39 ...
cca.res <- rcc(X=nutrimouse$gene_selected, Y=nutrimouse$lipid, ncomp=2)
max(abs(nutrimouse$gene_selected - cca.res$X))
## [1] 0
max(abs(nutrimouse$lipid - cca.res$Y))
## [1] 0
str(cca.res)
## List of 11
##  $ call         : language rcc(X = nutrimouse$gene_selected, Y = nutrimouse$lipid, ncomp = 2)
##  $ X            : num [1:40, 1:20] -0.42 -0.44 -0.48 -0.45 -0.42 -0.43 -0.53 -0.49 -0.36 -0.5 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:20] "X36b4" "ACAT1" "ACAT2" "ACBP" ...
##  $ Y            : num [1:40, 1:21] 0.34 0.38 0.36 0.22 0.37 1.7 0.35 0.34 0.22 1.38 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:21] "C14.0" "C16.0" "C18.0" "C16.1n.9" ...
##  $ ncomp        : num 2
##  $ method       : chr "ridge"
##  $ cor          : Named num [1:20] 1 1 0.999 0.996 0.981 ...
##   ..- attr(*, "names")= chr [1:20] "1" "2" "3" "4" ...
##  $ loadings     :List of 2
##   ..$ X: num [1:20, 1:2] 1.408 4.802 3.234 -7.373 -0.724 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:20] "X36b4" "ACAT1" "ACAT2" "ACBP" ...
##   .. .. ..$ : NULL
##   ..$ Y: num [1:21, 1:2] 1.1112 -0.1436 -0.4625 -1.0203 -0.0901 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:21] "C14.0" "C16.0" "C18.0" "C16.1n.9" ...
##   .. .. ..$ : NULL
##  $ variates     :List of 2
##   ..$ X: num [1:40, 1:2] -1.203 -1.25 -0.831 0.338 -0.119 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. .. ..$ : NULL
##   ..$ Y: num [1:40, 1:2] -1.203 -1.25 -0.831 0.338 -0.119 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. .. ..$ : NULL
##  $ names        :List of 4
##   ..$ sample  : chr [1:40] "1" "2" "3" "4" ...
##   ..$ colnames:List of 2
##   .. ..$ X: chr [1:20] "X36b4" "ACAT1" "ACAT2" "ACBP" ...
##   .. ..$ Y: chr [1:21] "C14.0" "C16.0" "C18.0" "C16.1n.9" ...
##   ..$ blocks  : chr [1:2] "X" "Y"
##   ..$ data    : chr [1:2] "nutrimouse$gene_selected" "nutrimouse$lipid"
##  $ lambda       : Named num [1:2] 0 0
##   ..- attr(*, "names")= chr [1:2] "lambda1" "lambda2"
##  $ prop_expl_var:List of 2
##   ..$ X: Named num [1:2] 0.00132 0.0024
##   .. ..- attr(*, "names")= chr [1:2] "comp1" "comp2"
##   ..$ Y: Named num [1:2] 0.0184 0.0299
##   .. ..- attr(*, "names")= chr [1:2] "comp1" "comp2"
##  - attr(*, "class")= chr "rcc"
cca.res$cor
##          1          2          3          4          5          6          7 
## 1.00000000 1.00000000 0.99922446 0.99607902 0.98142435 0.95641141 0.89083472 
##          8          9         10         11         12         13         14 
## 0.88959894 0.78648273 0.76470925 0.75189350 0.66984945 0.63240310 0.53662009 
##         15         16         17         18         19         20 
## 0.49948385 0.34852831 0.33274136 0.27818295 0.22569639 0.03783839

The sample distribution plot can be performed with variates, sample coordinates in the new reference (rotated axes) for each of the two blocks.

str(cca.res$variates)
## List of 2
##  $ X: num [1:40, 1:2] -1.203 -1.25 -0.831 0.338 -0.119 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : NULL
##  $ Y: num [1:40, 1:2] -1.203 -1.25 -0.831 0.338 -0.119 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:40] "1" "2" "3" "4" ...
##   .. ..$ : NULL
PCx <- 1
PCy <- 2
par(mfrow=c(1,2), las=1, mar=c(4,3,1,1))
plot(cca.res$variates$X[, PCx], cca.res$variates$X[, PCy], xlab=PCx, ylab=PCy, main="X", type='n')
text(cca.res$variates$X[, PCx], cca.res$variates$X[, PCy], rownames(cca.res$variates$X), col='blue', cex=0.6)
plot(cca.res$variates$Y[, PCx], cca.res$variates$Y[, PCy], xlab=PCx, ylab=PCy, main="Y", type='n')
text(cca.res$variates$Y[, PCx], cca.res$variates$Y[, PCy], rownames(cca.res$variates$Y), col='blue', cex=0.6)
cor(cca.res$variates$X[,1], cca.res$variates$Y[,1])
## [1] 1
cor(cca.res$variates$X[,2], cca.res$variates$Y[,2])
## [1] 1
plotIndiv(cca.res)
plotArrow(cca.res)

Variable relationship is obtained from loadings or with plotVar.

par(mfrow=c(1,2), las=2, mar=c(4,8,1,1))
loadings.ind.X <- order(abs(cca.res$loadings$X[, 1]), decreasing = T)
barplot(head(cca.res$loadings$X[loadings.ind.X, 1], 10), main="X", horiz = T, cex.names=0.8)
loadings.ind.Y <- order(abs(cca.res$loadings$Y[, 1]), decreasing = T)
barplot(head(cca.res$loadings$Y[loadings.ind.Y, 1], 10), main="Y", horiz = T, cex.names=0.8)
max(abs(cca.res$variates$X - scale(cca.res$X, center=T, scale=F) %*% cca.res$loadings$X))
max(abs(cca.res$variates$Y - scale(cca.res$Y, center=T, scale=F) %*% cca.res$loadings$Y))
plotVar(cca.res)

2. Perform CCA with scaled datasets and observe the difference

cca.res.scale <- rcc(X=scale(nutrimouse$gene_selected, center=T, scale=T), 
                     Y=scale(nutrimouse$lipid, center=T, scale=T), ncomp=2)
max(abs(cca.res.scale$cor - cca.res$cor))
## [1] 1.149648e-10
max(abs(cca.res.scale$variates$X - cca.res$variates$X))
## [1] 2.64486
max(abs(cca.res.scale$variates$Y - cca.res$variates$Y))
## [1] 2.64486
max(abs(cca.res.scale$loadings$X - cca.res$loadings$X))
## [1] 7.845927
max(abs(cca.res.scale$loadings$Y - cca.res$loadings$Y))
## [1] 123.5061

3. Perform regularized CCA with all genes and lipids.

rcca.res <- rcc(X=nutrimouse$gene, Y=nutrimouse$lipid, ncomp=2, method="shrinkage")
plotVar(rcca.res)